library(devtools)
install_github("wenweixiong/MARVEL")
install.packages("MARVEL")
library(MARVEL)
library(data.table) # Fast read-in of input files using fread() function
library(pdp) # Arranging output plots using grid.arrange() function
tran_id column of the relevant metadata. For detailed information of splicing nomenclature, please visit https://wenweixiong.github.io/Splicing_Nomenclature.CreateMarvelObject to create our S3 object. Arguments required are:SplicePheno Sample metadata. Mandatory columns are sample.id and cell.type.SpliceJunction Splice junction counts.SpliceFeature Splicing event metadata. Each element in the list represents a data frame corresponding to a specific splicing event type. Names of each element in the list should reflect the splicing event type, i.e. SE, MXE, RI, A5SS, and A3SS. Mandatory columns are tran_id and gene_id.# Read sample metadata file
path_to_file <- "Data/SJ_phenoData.txt"
df.pheno <- read.table(path_to_file, sep="\t", header=TRUE, stringsAsFactors=FALSE, na.strings="NA")
# Subset good-quality samples
df.pheno <- df.pheno[which(df.pheno$sample.type=="Single Cell" & df.pheno$qc.seq=="pass" & df.pheno$cell.type != "Unknown"), ]
df.pheno[1:5,]
## sample.id cell.type sample.type qc.seq
## 2 ERR1562084 iPSC Single Cell pass
## 3 ERR1562085 iPSC Single Cell pass
## 4 ERR1562086 iPSC Single Cell pass
## 5 ERR1562087 iPSC Single Cell pass
## 6 ERR1562088 iPSC Single Cell pass
# Read splice junction file
path_to_file <- "Data/SJ.txt"
sj <- as.data.frame(fread(path_to_file, sep="\t", header=TRUE, stringsAsFactors=FALSE, na.strings="NA"))
sj[7000:7005,1:5]
## coord.intron ERR1562083 ERR1562084 ERR1562085 ERR1562086
## 7000 chr1:114138054:114139614 1 5 1 5
## 7001 chr1:114139881:114142686 NA NA NA NA
## 7002 chr1:114139881:114149666 NA NA NA NA
## 7003 chr1:114139881:114152211 NA NA NA NA
## 7004 chr1:114139881:114152217 NA NA NA NA
## 7005 chr1:114139881:114152765 1 2 1 NA
# Read splicing event metadata files
event.types <- c("SE", "MXE", "RI", "A5SS", "A3SS")
df.feature.list <- list()
for(i in 1:length(event.types)) {
path_to_file <- paste("Data/", event.types[i], "_featureData.txt", sep="")
df.feature.list[[i]] <- read.table(path_to_file, sep="\t", header=TRUE, stringsAsFactors=FALSE, na.strings="NA")
}
names(df.feature.list) <- event.types
df.feature.list[[1]][1:5, ]
## tran_id
## 1 chrX:156022314:156022459:+@chrX:156022699:156022834:+@chrX:156023012:156023209
## 2 chrX:154227192:154227360:+@chrX:154228828:154228993:+@chrX:154230548:154230598
## 3 chrX:154190054:154190222:+@chrX:154191688:154191853:+@chrX:154193408:154193458
## 4 chrX:154152940:154153108:+@chrX:154154574:154154739:+@chrX:154156294:154156344
## 5 chrX:152918983:152919081:+@chrX:152920328:152920411:+@chrX:152920707:152920748
## gene_id gene_short_name gene_type
## 1 ENSG00000182484.15 WASH6P transcribed_unprocessed_pseudogene
## 2 ENSG00000166160.9 OPN1MW2 protein_coding
## 3 ENSG00000268221.5 OPN1MW protein_coding
## 4 ENSG00000102076.9 OPN1LW protein_coding
## 5 ENSG00000147394.18 ZNF185 protein_coding
# Create MARVEL object
marvel <- CreateMarvelObject(
SplicePheno=df.pheno, # Sample metadata
SpliceJunction=sj, # Splice junction counts
SpliceFeature=df.feature.list # Splicing event metadata
)
GenePheno Sample metadata. Mandatory columns are sample.id and cell.type.GeneFeature Gene metadata. Mandatory column is gene_id.Exp Normalised and log-transformed gene expression matrix.# Sample metadata
path_to_file <- "Data/TPM_phenoData.txt"
df.tpm.pheno <- read.table(path_to_file, sep="\t", header=TRUE, stringsAsFactors=FALSE)
df.tpm.pheno[1:5, ]
## sample.id cell.type sample.type qc.seq
## 1 ERR1562083 Unknown Single Cell pass
## 2 ERR1562084 iPSC Single Cell pass
## 3 ERR1562085 iPSC Single Cell pass
## 4 ERR1562086 iPSC Single Cell pass
## 5 ERR1562087 iPSC Single Cell pass
# Gene metadata
path_to_file <- "Data/TPM_featureData.txt"
df.tpm.feature <- read.table(path_to_file, sep="\t", header=TRUE, stringsAsFactors=FALSE)
df.tpm.feature[1:5, ]
## gene_id gene_short_name gene_type
## 1 ENSG00000000003.14 TSPAN6 protein_coding
## 2 ENSG00000000005.6 TNMD protein_coding
## 3 ENSG00000000419.12 DPM1 protein_coding
## 4 ENSG00000000457.14 SCYL3 protein_coding
## 5 ENSG00000000460.17 C1orf112 protein_coding
# Matrix
path_to_file <- "Data/TPM.txt"
df.tpm <- read.table(path_to_file, sep="\t", header=TRUE, stringsAsFactors=FALSE)
df.tpm[1:5,1:5]
## gene_id ERR1562083 ERR1562084 ERR1562085 ERR1562086
## 1 ENSG00000000003.14 343.26 163.45 210.43 190.46
## 2 ENSG00000000005.6 5.69 0.00 0.00 0.00
## 3 ENSG00000000419.12 288.02 155.26 42.49 238.67
## 4 ENSG00000000457.14 1.58 8.71 0.00 1.06
## 5 ENSG00000000460.17 41.23 28.53 100.17 66.43
# Subset good-quality samples
df.tpm.pheno <- df.tpm.pheno[which(df.tpm.pheno$qc.seq=="pass" & df.tpm.pheno$cell.type != "Unknown"), ]
# Transform values
df.tpm[,-1] <- log2(df.tpm[,-1] + 1)
# Set threshold for values
df.tpm[,-1][df.tpm[,-1] < 1] <- 0
# Create Marvel object
marvel <- CreateMarvelObject(
SplicePheno=df.pheno, # Sample metadata
SpliceJunction=sj, # Splice junction counts
SpliceFeature=df.feature.list, # Splicing event metadata
GenePheno=df.tpm.pheno, # Sample metadata
GeneFeature=df.tpm.feature, # Gene metadata
Exp=df.tpm # Gene expression matrix
)
CoverageThreshold argument, above which the splicing event is included for PSI calculation. For example, if the coverage threshold is set at 10, then MARVEL will only include the splicing event if all included or excluded junctions involved in calculating the splicing events are supported by at least 10 or more reads. NA in the PSI matrix returned are splicing events whose sample that did not have sufficient reads, i.e. lower number of reads than that specified by the user. The coverage threshold of 10 has been used multiple times in previous single-cell studies for selecting splicing events for downstream analysis (Song et al., 2017; Buen Abad Najar et al, 2020).marvel <- ComputePSI(MarvelObject=marvel, CoverageThreshold=10, EventType="SE")
## [1] "Analysing 51154 splicing events"
## [1] "20509 splicing events validated and quantified"
marvel$SpliceFeatureValidated$SE[1:5, ]
## tran_id
## 6 chrX:152922173:152922256:+@chrX:152922720:152922809:+@chrX:152928575:152928661
## 7 chrX:152922173:152922256:+@chrX:152922720:152922809:+@chrX:152931672:152931776
## 8 chrX:152922173:152922256:+@chrX:152928575:152928661:+@chrX:152931672:152931776
## 9 chrX:152922720:152922809:+@chrX:152928575:152928661:+@chrX:152931672:152931776
## 12 chrX:152932873:152932971:+@chrX:152936418:152936513:+@chrX:152938074:152938163
## event_type gene_id gene_short_name gene_type
## 6 SE ENSG00000147394.18 ZNF185 protein_coding
## 7 SE ENSG00000147394.18 ZNF185 protein_coding
## 8 SE ENSG00000147394.18 ZNF185 protein_coding
## 9 SE ENSG00000147394.18 ZNF185 protein_coding
## 12 SE ENSG00000147394.18 ZNF185 protein_coding
marvel$PSI$SE[5000:5005, 1:5]
## tran_id
## 5000 chr1:42693490:42693523:+@chr1:42696199:42696288:+@chr1:42696642:42696944
## 5001 chr1:160350105:160350250:+@chr1:160351222:160351372:+@chr1:160351696:160351729
## 5002 chr1:160350105:160350250:+@chr1:160351288:160351372:+@chr1:160351696:160351805
## 5003 chr1:160353160:160353237:+@chr1:160353323:160353501:+@chr1:160354118:160354290
## 5004 chr14:100376439:100376495:+@chr14:100380910:100381746:+@chr14:100468021:100468168
## 5005 chr14:100380910:100381746:+@chr14:100468021:100468168:+@chr14:100483994:100484124
## ERR1562083 ERR1562084 ERR1562085 ERR1562086
## 5000 1 1 NA 1
## 5001 NA NA NA 1
## 5002 NA NA NA NA
## 5003 NA NA 0 NA
## 5004 NA NA NA NA
## 5005 NA NA NA NA
marvel$SpliceFeatureValidated$SE and marvel$PSI$SE slot, respectively.marvel$SpliceFeatureValidated$SE[1:5, ]
## tran_id
## 6 chrX:152922173:152922256:+@chrX:152922720:152922809:+@chrX:152928575:152928661
## 7 chrX:152922173:152922256:+@chrX:152922720:152922809:+@chrX:152931672:152931776
## 8 chrX:152922173:152922256:+@chrX:152928575:152928661:+@chrX:152931672:152931776
## 9 chrX:152922720:152922809:+@chrX:152928575:152928661:+@chrX:152931672:152931776
## 12 chrX:152932873:152932971:+@chrX:152936418:152936513:+@chrX:152938074:152938163
## event_type gene_id gene_short_name gene_type
## 6 SE ENSG00000147394.18 ZNF185 protein_coding
## 7 SE ENSG00000147394.18 ZNF185 protein_coding
## 8 SE ENSG00000147394.18 ZNF185 protein_coding
## 9 SE ENSG00000147394.18 ZNF185 protein_coding
## 12 SE ENSG00000147394.18 ZNF185 protein_coding
marvel$PSI$SE[5000:5005, 1:5]
## tran_id
## 5000 chr1:42693490:42693523:+@chr1:42696199:42696288:+@chr1:42696642:42696944
## 5001 chr1:160350105:160350250:+@chr1:160351222:160351372:+@chr1:160351696:160351729
## 5002 chr1:160350105:160350250:+@chr1:160351288:160351372:+@chr1:160351696:160351805
## 5003 chr1:160353160:160353237:+@chr1:160353323:160353501:+@chr1:160354118:160354290
## 5004 chr14:100376439:100376495:+@chr14:100380910:100381746:+@chr14:100468021:100468168
## 5005 chr14:100380910:100381746:+@chr14:100468021:100468168:+@chr14:100483994:100484124
## ERR1562083 ERR1562084 ERR1562085 ERR1562086
## 5000 1 1 NA 1
## 5001 NA NA NA 1
## 5002 NA NA NA NA
## 5003 NA NA 0 NA
## 5004 NA NA NA NA
## 5005 NA NA NA NA
marvel <- ComputePSI(MarvelObject=marvel, CoverageThreshold=10, EventType="MXE")
## [1] "Analysing 4892 splicing events"
## [1] "1279 splicing events validated and quantified"
marvel$SpliceFeatureValidated$MXE and marvel$PSI$MXE slot, respectively.marvel <- ComputePSI(MarvelObject=marvel, CoverageThreshold=10, EventType="A5SS")
## [1] "Analysing 10464 splicing events"
## [1] "5163 splicing events validated and quantified"
marvel$SpliceFeatureValidated$A5SS and marvel$PSI$A5SS slot, respectively.marvel <- ComputePSI(MarvelObject=marvel, CoverageThreshold=10, EventType="A3SS")
## [1] "Analysing 13160 splicing events"
## [1] "5852 splicing events validated and quantified"
marvel$SpliceFeatureValidated$A3SS and marvel$PSI$A3SS slot, respectively.IntronCountsFile argument. Here, we used Bedtools to compute the total coverage for each intron (Quinlan & Hall, 2010).# Read per-base intron coverage file
path_to_file <- "Data/Counts_by_Region.txt"
df.intron.counts <- as.data.frame(fread(path_to_file, sep="\t", header=TRUE, stringsAsFactors=FALSE, na.strings="NA"))
df.intron.counts[1:5, 1:5]
## coord.intron ERR1562083 ERR1562084 ERR1562085 ERR1562086
## 1 chr1:13375:13452 0 0 0 0
## 2 chr1:146510:146641 0 150 0 129
## 3 chr1:498457:498683 678 0 0 0
## 4 chr1:764801:765143 5375 4544 6794 1591
## 5 chr1:804967:806385 794 0 0 0
# Compute PSI
marvel <- ComputePSI(MarvelObject=marvel, CoverageThreshold=10, IntronCountsFile=df.intron.counts, thread=4, EventType="RI")
## [1] "Analysing 11829 splicing events"
## [1] "8295 splicing events validated and quantified"
# Sample metadata
path_to_file <- "Data/SJ_phenoData.txt"
df.pheno <- read.table(path_to_file, sep="\t", header=TRUE, stringsAsFactors=FALSE, na.strings="NA")
# Subset good-quality samples
df.pheno <- df.pheno[which(df.pheno$sample.type=="Single Cell" & df.pheno$qc.seq=="pass" & df.pheno$cell.type != "Unknown"), ]
# List of validated splicing event metadata
event.types <- c("SE", "MXE", "RI", "A5SS", "A3SS")
df.feature.list <- list()
for(i in 1:length(event.types)) {
path_to_file <- paste("Data/", event.types[i], "_featureData_Validated.txt", sep="")
df.feature.list[[i]] <- read.table(path_to_file, sep="\t", header=TRUE, stringsAsFactors=FALSE, na.strings="NA")
}
names(df.feature.list) <- event.types
# List of pre-computed PSI matrices
event.types <- c("SE", "MXE", "RI", "A5SS", "A3SS")
df.list <- list()
for(i in 1:length(event.types)) {
path_to_file <- paste("Data/", event.types[i], ".txt", sep="")
df.list[[i]] <- read.table(path_to_file, sep="\t", header=TRUE, stringsAsFactors=FALSE, na.strings="NA")
}
names(df.list) <- event.types
# Create MARVEL object
marvel <- CreateMarvelObject(
SplicePheno=df.pheno, # Sample metadata
SpliceFeatureValidated=df.feature.list, # Validated splicing event metadata
PSI=df.list, # Pre-computed PSI matrices
GenePheno=df.tpm.pheno, # Sample metadata
GeneFeature=df.tpm.feature, # Gene metadata
Exp=df.tpm # Gene expression matrix
)
RunPCA function performs dimension reduction and visualizes these principle components. Indicate "gene" or "splicing" in the level argument for performing PCA on gene and splicing data, respectively. Results including the PCA coordinates and plots are returned in the marvel$PCA$Gene and marvel$PCA$PSI slot, respectively.# Differential gene expression analysis
marvel <- CompareValues(
MarvelObject=marvel, # Marvel object
cell.types=c("iPSC", "Endoderm"), # Cell types to include for analysis
n.cells=3, # Min. no. of cells expression value > 1 required
method="wilcox", # "wilcox"/"t.test"
method.adj="fdr", # Adjust for multiple testing as per p.adjust
level="gene" # "gene"/"splicing" data for DE analysis
)
## [1] "Checking... Matrix column (sample) names match sample metadata"
## [1] "Checking... Matrix row (feature) names match feature metadata"
marvel$DE$Gene[1:5, ]
## gene_id gene_short_name gene_type n.cells.g1 n.cells.g2
## 1 ENSG00000141448.10 GATA6 protein_coding 0 52
## 2 ENSG00000273706.4 LHX1 protein_coding 1 52
## 3 ENSG00000250361.8 GYPB protein_coding 3 52
## 4 ENSG00000266010.2 GATA6-AS1 lncRNA 1 51
## 5 ENSG00000158815.11 FGF17 protein_coding 0 50
## mean.g1 mean.g2 mean.fc p.val p.val.adj
## 1 0.00000000 6.051803 6.051803 3.364948e-28 6.476516e-24
## 2 0.02387774 6.346353 6.322476 7.065377e-28 6.799366e-24
## 3 0.06427675 12.180694 12.116417 2.412354e-27 1.547686e-23
## 4 0.01578723 7.528112 7.512325 3.652428e-27 1.757457e-23
## 5 0.00000000 5.835133 5.835133 9.209198e-27 2.743229e-23
# Reduce dimension using significant gene
# Subset gene IDs for PCA
features <- marvel$DE$Gene[which(marvel$DE$Gene$p.val.adj < 0.1), "gene_id"]
head(features)
## [1] "ENSG00000141448.10" "ENSG00000273706.4" "ENSG00000250361.8"
## [4] "ENSG00000266010.2" "ENSG00000158815.11" "ENSG00000185155.11"
# PCA
marvel <- RunPCA(
MarvelObject=marvel, # Marvel object
cell.types="all", # Cell types to include for analysis
n.cells=3, # Min. no. of cells expression value > 1 required
features=features, # gene_ids included for PCA
point.size=2.5, # Point size on PCA plot
level="gene" # "gene"/"splicing" data for PCA analysis
)
## [1] "Checking... Matrix column (sample) names match sample metadata"
## [1] "Checking... Matrix row (feature) names match feature metadata"
# View results
marvel$PCA$Gene$Results$ind$coord[1:5, 1:5]
## Dim.1 Dim.2 Dim.3 Dim.4 Dim.5
## ERR1562084 28.72190 -13.2616150 -9.25409430 0.8570145 -6.353654
## ERR1562085 32.27793 3.6945077 -0.03687886 6.1498344 -1.492075
## ERR1562086 38.68481 0.6185189 5.11077303 1.0221963 2.052011
## ERR1562087 29.56988 -14.9398291 -3.93151908 1.2784092 3.111345
## ERR1562088 25.36793 -8.0547843 -9.01760136 -0.1873535 -3.220140
marvel$PCA$Gene$Plot
# Reduce dimension using non-significant gene
# Subset gene IDs for PCA
features <- marvel$DE$Gene[which(marvel$DE$Gene$p.val.adj > 0.1), "gene_id"]
head(features)
## [1] "ENSG00000204682.7" "ENSG00000187607.16" "ENSG00000169220.18"
## [4] "ENSG00000103647.12" "ENSG00000091656.18" "ENSG00000276966.2"
# PCA
marvel <- RunPCA(
MarvelObject=marvel, # Marvel object
cell.types="all", # Cell types to include for analysis
n.cells=3, # Min. no. of cells expression value > 1 required
features=features, # gene_ids included for PCA
point.size=2.5, # Point size on PCA plot
level="gene" # "gene"/"splicing" data for PCA analysis
)
## [1] "Checking... Matrix column (sample) names match sample metadata"
## [1] "Checking... Matrix row (feature) names match feature metadata"
# View results
marvel$PCA$Gene$Results$ind$coord[1:5, 1:5]
## Dim.1 Dim.2 Dim.3 Dim.4 Dim.5
## ERR1562084 -0.6651731 -13.347873 2.6402208 -6.2685910 -4.4175709
## ERR1562085 2.9929379 3.110724 -1.4457918 -0.8673106 -5.6706971
## ERR1562086 9.2481014 1.836180 -0.1071698 1.2537718 -2.2324295
## ERR1562087 9.7771895 -12.554229 2.7098048 -3.9588311 0.2829091
## ERR1562088 -4.4391940 -8.811576 3.7719248 -2.4919996 -5.5988634
marvel$PCA$Gene$Plot
# Subset isoform IDs of non-DE genes for PCA
gene_ids <- marvel$DE$Gene[which(marvel$DE$Gene$p.val.adj > 0.1), "gene_id"]
features <- sapply(marvel$SpliceFeatureValidated, function(x) {
x[which(x$gene_id %in% gene_ids), "tran_id"]
})
features <- unlist(features)
# Reduce dimension
marvel <- RunPCA(
MarvelObject=marvel, # Marvel object
cell.types="all", # Cell types to include for analysis
n.cells=25, # Min. no. of cells PSI != NA required
features=features, # tran(script)_ids included for PCA
point.size=2.5, # Point size on PCA plot
level="splicing", # "gene"/"splicing" data for PCA analysis
event.type="SE", # Event type to include for PCA
seed=1 # Ensures imputed values for NA PSIs are reproducible
)
## [1] "Checking... Matrix column (sample) names match sample metadata"
## [1] "Checking... Matrix row (feature) names match feature metadata"
marvel$PCA$PSI$Results$ind$coord[1:5, 1:5]
## Dim.1 Dim.2 Dim.3 Dim.4 Dim.5
## ERR1562084 6.462428 -5.740567 -1.3002646 -0.982652292 0.5138300
## ERR1562085 -2.173295 -4.172514 -0.3197154 -0.007238071 -0.9104135
## ERR1562086 2.740285 -7.539178 -3.2230219 1.606161428 -1.5937867
## ERR1562087 -17.271833 -12.020245 0.7599505 -3.222076536 3.4147965
## ERR1562088 -8.451066 -8.758732 -4.1472342 2.676906340 -2.5721418
marvel$PCA$PSI$Plot
# Subset isoform IDs of non-DE genes for PCA
gene_ids <- marvel$DE$Gene[which(marvel$DE$Gene$p.val.adj > 0.1), "gene_id"]
features <- sapply(marvel$SpliceFeatureValidated, function(x) {
x[which(x$gene_id %in% gene_ids), "tran_id"]
})
features <- unlist(features)
# Reduce dimension
marvel <- RunPCA(
MarvelObject=marvel, # Marvel object
cell.types="all", # Cell types to include for analysis
n.cells=25, # Min. no. of cells PSI != NA required
features=features, # tran(script)_ids included for PCA
point.size=2.5, # Point size on PCA plot
level="splicing", # "gene"/"splicing" data for PCA analysis
event.type="MXE", # Event type to include for PCA
seed=1 # Ensures imputed values for NA PSIs are reproducible
)
## [1] "Checking... Matrix column (sample) names match sample metadata"
## [1] "Checking... Matrix row (feature) names match feature metadata"
marvel$PCA$PSI$Results$ind$coord[1:5, 1:5]
## Dim.1 Dim.2 Dim.3 Dim.4 Dim.5
## ERR1562084 -1.9252821 -0.6609580 1.0806099 0.1762628 2.5599164
## ERR1562085 0.1252907 1.7415093 -2.2802251 0.3772675 0.1540427
## ERR1562086 0.7735132 -0.6401495 -1.9448808 1.3244310 -0.1310056
## ERR1562087 1.2005549 1.1249511 0.9535323 -1.1103283 -0.4209977
## ERR1562088 1.0460318 1.9104356 0.8470174 0.5754857 2.2845207
marvel$PCA$PSI$Plot
# Subset isoform IDs of non-DE genes for PCA
gene_ids <- marvel$DE$Gene[which(marvel$DE$Gene$p.val.adj > 0.1), "gene_id"]
features <- sapply(marvel$SpliceFeatureValidated, function(x) {
x[which(x$gene_id %in% gene_ids), "tran_id"]
})
features <- unlist(features)
# Reduce dimension
marvel <- RunPCA(
MarvelObject=marvel, # Marvel object
cell.types="all", # Cell types to include for analysis
n.cells=25, # Min. no. of cells PSI != NA required
features=features, # tran(script)_ids included for PCA
point.size=2.5, # Point size on PCA plot
level="splicing", # "gene"/"splicing" data for PCA analysis
event.type="A5SS", # Event type to include for PCA
seed=1 # Ensures imputed values for NA PSIs are reproducible
)
## [1] "Checking... Matrix column (sample) names match sample metadata"
## [1] "Checking... Matrix row (feature) names match feature metadata"
marvel$PCA$PSI$Results$ind$coord[1:5, 1:5]
## Dim.1 Dim.2 Dim.3 Dim.4 Dim.5
## ERR1562084 -3.2185357 3.121685 3.2110691 -0.04375606 -1.0895440
## ERR1562085 -0.6064916 4.477277 2.0409785 1.81226973 -3.9889038
## ERR1562086 -0.5012800 3.076808 -1.8018215 -0.85649526 0.6499135
## ERR1562087 10.1921642 2.936643 -0.1557592 4.59691962 -2.1637503
## ERR1562088 4.5519736 7.768303 -3.3154547 0.47996198 -3.1550390
marvel$PCA$PSI$Plot
# Subset isoform IDs of non-DE genes for PCA
gene_ids <- marvel$DE$Gene[which(marvel$DE$Gene$p.val.adj > 0.1), "gene_id"]
features <- sapply(marvel$SpliceFeatureValidated, function(x) {
x[which(x$gene_id %in% gene_ids), "tran_id"]
})
features <- unlist(features)
# Reduce dimension
marvel <- RunPCA(
MarvelObject=marvel, # Marvel object
cell.types="all", # Cell types to include for analysis
n.cells=25, # Min. no. of cells PSI != NA required
features=features, # tran(script)_ids included for PCA
point.size=2.5, # Point size on PCA plot
level="splicing", # "gene"/"splicing" data for PCA analysis
event.type="A3SS", # Event type to include for PCA
seed=1 # Ensures imputed values for NA PSIs are reproducible
)
## [1] "Checking... Matrix column (sample) names match sample metadata"
## [1] "Checking... Matrix row (feature) names match feature metadata"
marvel$PCA$PSI$Results$ind$coord[1:5, 1:5]
## Dim.1 Dim.2 Dim.3 Dim.4 Dim.5
## ERR1562084 2.3302696 -6.599812 1.4856812 0.6651221 -1.325160
## ERR1562085 0.6428542 -3.622499 0.5552881 -4.0731481 -1.174348
## ERR1562086 1.2963986 -4.012958 -3.4076001 -1.6822685 1.433621
## ERR1562087 -10.8163600 -4.467695 -3.6371610 2.5905744 -0.800931
## ERR1562088 -5.8715941 -5.298387 -1.7304378 -4.6135847 -2.322378
marvel$PCA$PSI$Plot
# Subset isoform IDs of non-DE genes for PCA
gene_ids <- marvel$DE$Gene[which(marvel$DE$Gene$p.val.adj > 0.1), "gene_id"]
features <- sapply(marvel$SpliceFeatureValidated, function(x) {
x[which(x$gene_id %in% gene_ids), "tran_id"]
})
features <- unlist(features)
# Reduce dimension
marvel <- RunPCA(
MarvelObject=marvel, # Marvel object
cell.types="all", # Cell types to include for analysis
n.cells=25, # Min. no. of cells PSI != NA required
features=features, # tran(script)_ids included for PCA
point.size=2.5, # Point size on PCA plot
level="splicing", # "gene"/"splicing" data for PCA analysis
event.type="RI", # Event type to include for PCA
seed=1 # Ensures imputed values for NA PSIs are reproducible
)
## [1] "Checking... Matrix column (sample) names match sample metadata"
## [1] "Checking... Matrix row (feature) names match feature metadata"
marvel$PCA$PSI$Results$ind$coord[1:5, 1:5]
## Dim.1 Dim.2 Dim.3 Dim.4 Dim.5
## ERR1562084 -2.8302853 0.9679984 -2.4911417 -2.5107365 2.292951
## ERR1562085 0.7514041 6.3904454 0.4846107 0.9686218 2.492471
## ERR1562086 0.4608501 2.0604306 -3.2207402 1.0693625 6.186201
## ERR1562087 12.0350607 3.5838722 -6.2808539 0.7865100 6.097407
## ERR1562088 4.2465988 5.9785119 2.4254475 -1.8722169 2.497759
marvel$PCA$PSI$Plot
# Subset isoform IDs of non-DE genes for PCA
gene_ids <- marvel$DE$Gene[which(marvel$DE$Gene$p.val.adj > 0.1), "gene_id"]
features <- sapply(marvel$SpliceFeatureValidated, function(x) {
x[which(x$gene_id %in% gene_ids), "tran_id"]
})
features <- unlist(features)
# Reduce dimension
marvel <- RunPCA(
MarvelObject=marvel, # Marvel object
cell.types="all", # Cell types to include for analysis
n.cells=25, # Min. no. of cells PSI != NA required
features=features, # tran(script)_ids included for PCA
point.size=2.5, # Point size on PCA plot
level="splicing", # "gene"/"splicing" data for PCA analysis
event.type="all", # Event type to include for PCA
seed=1 # Ensures imputed values for NA PSIs are reproducible
)
## [1] "Checking... Matrix column (sample) names match sample metadata"
## [1] "Checking... Matrix row (feature) names match feature metadata"
marvel$PCA$PSI$Results$ind$coord[1:5, 1:5]
## Dim.1 Dim.2 Dim.3 Dim.4 Dim.5
## ERR1562084 -8.010368 7.741121 3.8774446 1.6573993 -3.2443932
## ERR1562085 3.077964 11.015736 -4.7424458 -1.2223538 -1.6942337
## ERR1562086 -2.312093 12.454105 3.6929087 -2.3593765 -0.7986274
## ERR1562087 27.110337 12.218401 7.9632496 10.4572689 4.2847093
## ERR1562088 12.227947 17.655181 0.1003771 -0.2459062 3.6043105
marvel$PCA$PSI$Plot
AssignModality function assigns modality for each splicing event. Briefly, the maximum likelihood estimation is used to derive the alpha and beta parameters of the beta distribution to stratify each splicing event into one of the modalities.PropModality function tabulates and plots the proportion of each modality type for a given splicing event type or across splicing event types. Results are returned in marvel$Modality$Prop slot.# Assign modalities
marvel <- AssignModality(
MarvelObject=marvel, # Marvel object
cell.type="iPSC", # Cell type to include for analysis
n.cells=25, # Min. no. of cells PSI != NA required
sigma.sq=0.001, # Variance value below which sub-modality is primary,
# above which sub-modality is dispersed
bimodal.adjust=TRUE, # Detect and rectify false bimodals
seed=1 # Ensures MLE model returns reproducible parameter values
)
## [1] "Checking... Matrix column (sample) names match sample metadata"
## [1] "Checking... Matrix row (feature) names match feature metadata"
marvel$Modality$Results[1:5, ]
## tran_id
## 1 chrX:155090784:155090839:+@chrX:155099315:155099389:+@chrX:155116057:155116188
## 2 chrX:155090784:155090839:+@chrX:155116057:155116188:+@chrX:155116711:155116754
## 3 chrX:155033403:155033553:+@chrX:155046509:155046584:+@chrX:155051670:155051801
## 4 chrX:155046509:155046584:+@chrX:155047369:155047391:+@chrX:155051670:155051801
## 5 chrX:154399338:154399396:+@chrX:154399487:154399594:+@chrX:154399803:154399941
## event_type gene_id gene_short_name gene_type n.cells
## 1 SE ENSG00000185515.14 BRCC3 protein_coding 43
## 2 SE ENSG00000185515.14 BRCC3 protein_coding 35
## 3 SE ENSG00000165775.18 FUNDC2 protein_coding 63
## 4 SE ENSG00000165775.18 FUNDC2 protein_coding 67
## 5 SE ENSG00000147403.16 RPL10 protein_coding 83
## alpha beta modality modality.var modality.bimodal.adj
## 1 0.3248933 2.2496947 Bimodal Bimodal Excluded.Dispersed
## 2 186.8246753 1.8538713 Included Included.Primary Included.Primary
## 3 45.0932034 0.7900596 Included Included.Dispersed Included.Dispersed
## 4 2.4134211 233.6340402 Excluded Excluded.Primary Excluded.Primary
## 5 237.6961900 2.2802562 Included Included.Primary Included.Primary
n.cells column indicates the no. of cells expressing the splicing event.modality column represents the 5 basic modalities.modality.var column represents the extended modalities as proposed by us.modality.bimodal.adj column represents the extended modalities whose false bimodals have been identified and reclassified into either included or excluded modalities.# Plot basic modalities
marvel <- PropModality(
MarvelObject=marvel, # Marvel object from AssignModality function
modality.column="modality.bimodal.adj", # Modality column to tabulate proportion
modality.type="basic", # Five original modalities by Song (Mol Cell,2017)
event.type="all", # Event type(s) to include for analysis
across.event.type=FALSE # Do not compare proportion across event types
)
plot.1 <- marvel$Modality$Prop$DoughnutChart$Plot
# Plot extended modalities
marvel <- PropModality(
MarvelObject=marvel,
modality.column="modality.bimodal.adj",
modality.type="extended", # Extended modalities by us
event.type="all",
across.event.type=FALSE
)
plot.2 <- marvel$Modality$Prop$DoughnutChart$Plot
grid.arrange(plot.1, plot.2, nrow=1)
marvel <- PropModality(
MarvelObject=marvel, # Marvel object from AssignModality function
modality.column="modality.bimodal.adj", # Modality column to tabulate proportion
modality.type="basic", # Five original modalities by Song (Mol Cell,2017)
event.type="all", # Event type(s) to include for analysis
across.event.type=TRUE, # Compare proportion across event types
prop.test="chisq", # Statistical test: "chisq"/"fisher"
prop.adj="fdr" # Adjust for multiple testing as per p.adjust
)
marvel$Modality$Prop$BarChart$Plot
marvel$Modality$Prop$BarChart$Test
## modality p.val p.val.adj
## 1 Included 3.233246e-165 1.616623e-164
## 2 Excluded 3.680623e-131 9.201558e-131
## 3 Bimodal 1.280179e-03 1.280179e-03
## 4 Middle 3.110818e-19 5.184696e-19
## 5 Multimodal 2.664265e-08 3.330331e-08
# Assign modalities
marvel <- AssignModality(
MarvelObject=marvel, # Marvel object
cell.type="Endoderm",# Cell type to include for analysis
n.cells=25, # Min. no. of cells PSI != NA required
sigma.sq=0.001, # Variance value below which sub-modality is primary,
# above which sub-modality is dispersed
bimodal.adjust=TRUE, # Detect and rectify false bimodals
seed=1 # Ensures MLE model returns reproducible parameter values
)
## [1] "Checking... Matrix column (sample) names match sample metadata"
## [1] "Checking... Matrix row (feature) names match feature metadata"
marvel$Modality$Results[1:5, ]
## tran_id
## 1 chrX:155033403:155033553:+@chrX:155046509:155046584:+@chrX:155051670:155051801
## 2 chrX:155046509:155046584:+@chrX:155047369:155047391:+@chrX:155051670:155051801
## 3 chrX:154399338:154399396:+@chrX:154399487:154399594:+@chrX:154399803:154399941
## 4 chrX:154399803:154399941:+@chrX:154400464:154400626:+@chrX:154400702:154402339
## 5 chrX:154379197:154379566:+@chrX:154379690:154379794:+@chrX:154379942:154380019
## event_type gene_id gene_short_name gene_type n.cells
## 1 SE ENSG00000165775.18 FUNDC2 protein_coding 28
## 2 SE ENSG00000165775.18 FUNDC2 protein_coding 31
## 3 SE ENSG00000147403.16 RPL10 protein_coding 53
## 4 SE ENSG00000147403.16 RPL10 protein_coding 53
## 5 SE ENSG00000102119.10 EMD protein_coding 30
## alpha beta modality modality.var modality.bimodal.adj
## 1 44.2077643 0.7706464 Included Included.Dispersed Included.Dispersed
## 2 0.2627077 2.1244924 Bimodal Bimodal Excluded.Dispersed
## 3 233.6663960 2.2003259 Included Included.Primary Included.Primary
## 4 39.3616462 0.9308145 Included Included.Primary Included.Primary
## 5 170.2131013 1.6925857 Included Included.Primary Included.Primary
n.cells column indicates the no. of cells expressing the splicing event.modality column represents the 5 basic modalities.modality.var column represents the extended modalities as proposed by us.modality.bimodal.adj column represents the extended modalities whose false bimodals have been identified and reclassified into either included or excluded modalities.# Plot basic modalities
marvel <- PropModality(
MarvelObject=marvel, # Marvel object from AssignModality function
modality.column="modality.bimodal.adj", # Modality column to tabulate proportion
modality.type="basic", # Five original modalities by Song (Mol Cell,2017)
event.type="all", # Event type(s) to include for analysis
across.event.type=FALSE # Do not compare proportion across event types
)
plot.1 <- marvel$Modality$Prop$DoughnutChart$Plot
# Plot extended modalities
marvel <- PropModality(
MarvelObject=marvel,
modality.column="modality.bimodal.adj",
modality.type="extended", # Extended modalities by us
event.type="all",
across.event.type=FALSE
)
plot.2 <- marvel$Modality$Prop$DoughnutChart$Plot
grid.arrange(plot.1, plot.2, nrow=1)
marvel <- PropModality(
MarvelObject=marvel, # Marvel object from AssignModality function
modality.column="modality.bimodal.adj", # Modality column to tabulate proportion
modality.type="basic", # Five original modalities by Song (Mol Cell,2017)
event.type="all", # Event type(s) to include for analysis
across.event.type=TRUE, # Compare proportion across event types
prop.test="chisq", # Statistical test: "chisq"/"fisher"
prop.adj="fdr" # Adjust for multiple testing as per p.adjust
)
marvel$Modality$Prop$BarChart$Plot
marvel$Modality$Prop$BarChart$Test
## modality p.val p.val.adj
## 1 Included 5.954299e-75 2.977150e-74
## 2 Excluded 7.014251e-57 1.753563e-56
## 3 Bimodal 1.578443e-03 1.578443e-03
## 4 Middle 5.254774e-09 8.757957e-09
## 5 Multimodal 2.554295e-05 3.192868e-05
CompareValues function together with level=splicing argumennt. Results are returned in the marvel$DE$PSI slotmarvel <- CompareValues(
MarvelObject=marvel, # Marvel object
cell.types=c("iPSC", "Endoderm"), # Cell types to analyse
n.cells=25, # Min. no. of cells PSI != NA required
method="ks", # "ks"/"wilcox"/"t.test"
method.adj="fdr", # Adjust for multiple testing as per p.adjust
level="splicing" # "gene"/"splicing" data to analyse
)
## [1] "Checking... Matrix column (sample) names match sample metadata"
## [1] "Checking... Matrix row (feature) names match feature metadata"
marvel$DE$PSI[1:5, ]
## tran_id
## 1 chr15:24962114:24962209:+@chr15:24967029:24967152:+@chr15:24967932:24968082
## 2 chr15:24962114:24962209:+@chr15:24967029:24967240:+@chr15:24967932:24968082
## 3 chr10:78037194:78037304:+@chr10:78037439:78037441:+@chr10:78040204:78040225
## 4 chr13:43059394:43059714:+@chr13:43062190:43062295
## 5 chr3:197950190:197950221|197950299:+@chr3:197950936:197950978
## event_type gene_id gene_short_name gene_type n.cells.g1
## 1 SE ENSG00000128739.22 SNRPN protein_coding 83
## 2 SE ENSG00000128739.22 SNRPN protein_coding 83
## 3 SE ENSG00000138326.20 RPS24 protein_coding 83
## 4 RI ENSG00000120675.6 DNAJC15 protein_coding 67
## 5 A5SS ENSG00000182899.17 RPL35A protein_coding 83
## n.cells.g2 mean.g1 mean.g2 mean.diff p.val p.val.adj
## 1 53 0.1003557547 0.009585074 -0.09077068 0 0
## 2 53 0.0655082154 0.011043286 -0.05446493 0 0
## 3 53 0.1293116116 0.013734360 -0.11557725 0 0
## 4 53 0.0001407511 0.122010454 0.12186970 0 0
## 5 53 0.1930818781 0.054302037 -0.13877984 0 0
BioPathways function.marvel <- BioPathways(
MarvelObject=marvel, # Marvel object from CompareValues function()
psi.de.sig=0.05, # Adjusted p-value below which splicing event
# considered differentially spliced
method.adjust="fdr", # Adjust for multiple testing as per p.adjust
min.genes=10, # Min.no.of differentially spliced genes required
p.val.adj.return=0.05, # Return pathways with adjusted p-value below this value
plot.top.n=10 # Plot these top most significant pathways
)
marvel$DE$BioPathways[1:5,]
## GOBPID Pvalue OddsRatio ExpCount Count Size
## 1 GO:0006413 4.449963e-10 4.549807 17.58498 42 77
## 2 GO:0000184 4.463616e-09 4.025353 18.59510 42 81
## 3 GO:0019083 9.978636e-09 4.024699 17.66741 40 77
## 4 GO:0006614 1.765970e-08 3.898635 17.93769 40 78
## 5 GO:0072594 3.213229e-07 2.540323 35.64540 62 155
## Term
## 1 translational initiation
## 2 nuclear-transcribed mRNA catabolic process, nonsense-mediated decay
## 3 viral transcription
## 4 SRP-dependent cotranslational protein targeting to membrane
## 5 establishment of protein localization to organelle
## p.val.adj
## 1 1.363024e-06
## 2 6.836029e-06
## 3 1.018819e-05
## 4 1.352291e-05
## 5 1.931496e-04
## genes.hit
## 1 EIF4A1|EIF4A2|EIF4G2|EIF6|RPL10A|NPM1|POLR2G|RPL3|RPL5|RPL7A|RPL8|RPL9|RPL10|RPL13|RPL17|RPL18|RPL21|RPL24|RPL26|RPL30|RPL31|RPL35A|RPL37A|RPL38|RPL39|RPL41|RPL36A|RPLP0|RPLP2|RPS2|RPS6|RPS11|RPS13|RPS15A|RPS16|RPS17|RPS19|RPS20|RPS21|RPS24|RPS25|RPS27A|RPS28|RPS29|UBA52|EIF4H|DENR|CDC123|RPL35|PABPC1|EIF3K
## 2 RPL10A|RPL3|RPL5|RPL7A|RPL8|RPL9|RPL10|RPL13|RPL17|RPL18|RPL21|RPL24|RPL26|RPL30|RPL31|RPL35A|RPL37A|RPL38|RPL39|RPL41|RPL36A|RPLP0|RPLP2|RPS2|RPS6|RPS11|RPS13|RPS15A|RPS16|RPS17|RPS19|RPS20|RPS21|RPS24|RPS25|RPS27A|RPS28|RPS29|UBA52|RBM8A|RPL35|PABPC1|MAGOHB
## 3 RPL10A|POLR2G|POLR2H|RPL3|RPL5|RPL7A|RPL8|RPL9|RPL10|RPL13|RPL17|RPL18|RPL21|RPL24|RPL26|RPL30|RPL31|RPL35A|RPL37A|RPL38|RPL39|RPL41|RPL36A|RPLP0|RPLP2|RPS2|RPS6|RPS11|RPS13|RPS15A|RPS16|RPS17|RPS19|RPS20|RPS21|RPS24|RPS25|RPS27A|RPS28|RPS29|SUPT4H1|UBA52|RPL35
## 4 RPL10A|RPL3|RPL5|RPL7A|RPL8|RPL9|RPL10|RPL13|RPL17|RPL18|RPL21|RPL24|RPL26|RPL30|RPL31|RPL35A|RPL37A|RPL38|RPL39|RPL41|RPL36A|RPLP0|RPLP2|RPS2|RPS6|RPS11|RPS13|RPS15A|RPS16|RPS17|RPS19|RPS20|RPS21|RPS24|RPS25|RPS27A|RPS28|RPS29|UBA52|RPL35
## 5 CCT6A|HSPA4|IDH1|TNPO1|RPL10A|RAN|RPL3|RPL5|RPL7A|RPL8|RPL9|RPL10|RPL13|RPL17|RPL18|RPL21|RPL24|RPL26|RPL30|RPL31|RPL35A|RPL37A|RPL38|RPL39|RPL41|RPL36A|RPLP0|RPLP2|RPS2|RPS6|RPS11|RPS13|RPS15A|RPS16|RPS17|RPS19|RPS20|RPS21|RPS24|RPS25|RPS27A|RPS28|RPS29|TERF1|UBA52|UBE2D3|YWHAB|YWHAE|YWHAZ|USP9X|RPL35|SEC61G|TIMM8B|DNAJC15|FIS1|TOMM7|UBL5|RPAIN|ATP5IF1|DNAJC19|ROMO1|TOMM5
marvel$DE$BioPathwaysPlot
IsoSwitch function for this purpose. Results are returned in marvel$DE$Cor* slot.marvel <- IsoSwitch(
MarvelObject=marvel, # Marvel object from CompareValues function()
psi.de.sig=0.05, # Adjusted p-value below which the splicing
# event is considered differentially spliced
cell.types=c("iPSC", "Endoderm"), # Cell types to analyse
method="wilcox", # Statistical test for differential gene
# expression analysis
method.adj="fdr", # Adjust for multiple testing as per p.adjust
gene.de.sig=0.05 # Adjusted p-value below which the gene
# is considered differentially expressed
)
## [1] "Checking... Matrix column (sample) names match sample metadata"
## [1] "Checking... Matrix row (feature) names match feature metadata"
marvel$DE$Cor[1:5, ]
## tran_id
## 1 chr15:24962114:24962209:+@chr15:24967029:24967152:+@chr15:24967932:24968082
## 2 chr15:24962114:24962209:+@chr15:24967029:24967240:+@chr15:24967932:24968082
## 3 chr10:78037194:78037304:+@chr10:78037439:78037441:+@chr10:78040204:78040225
## 4 chr13:43059394:43059714:+@chr13:43062190:43062295
## 5 chr3:197950190:197950221|197950299:+@chr3:197950936:197950978
## event_type gene_id gene_short_name gene_type n.cells.g1
## 1 SE ENSG00000128739.22 SNRPN protein_coding 83
## 2 SE ENSG00000128739.22 SNRPN protein_coding 83
## 3 SE ENSG00000138326.20 RPS24 protein_coding 83
## 4 RI ENSG00000120675.6 DNAJC15 protein_coding 67
## 5 A5SS ENSG00000182899.17 RPL35A protein_coding 83
## n.cells.g2 mean.g1 mean.g2 mean.diff p.val p.val.adj
## 1 53 0.1003557547 0.009585074 -0.09077068 0 0
## 2 53 0.0655082154 0.011043286 -0.05446493 0 0
## 3 53 0.1293116116 0.013734360 -0.11557725 0 0
## 4 53 0.0001407511 0.122010454 0.12186970 0 0
## 5 53 0.1930818781 0.054302037 -0.13877984 0 0
## psi.gene.cor
## 1 Mixed
## 2 Mixed
## 3 Mixed
## 4 Same direction
## 5 Iso-switch
marvel$DE$CorPlot
marvel$DE$CorProp
## psi.gene.cor freq pct
## 4 Same direction 203 40.11858
## 3 Opposite direction 88 17.39130
## 1 Iso-switch 114 22.52964
## 2 Mixed 101 19.96047
# Example of a gene with isoform switch
. <- unique(marvel$DE$Cor[which(marvel$DE$Cor$psi.gene.cor=="Iso-switch"), ])
gene_id <- .$gene_id[1]
tran_id <- .$tran_id[1]
marvel <- PlotValues(
MarvelObject=marvel, # Marvel object from CompareValues function()
cell.types=c("iPSC", "Endoderm"), # Cell types to plot
feature=gene_id, # gene_id to plot
maintitle="gene_short_name", # Column to use as plot title as per GeneFeature
level="gene" # "gene"/"splicing" data to plot
)
plot.1 <- marvel$adhocPlot$Gene
marvel <- PlotValues(
MarvelObject=marvel, # Marvel object from CompareValues function()
cell.types=c("iPSC", "Endoderm"), # Cell types to plot
feature=tran_id, # tran(script)_id to plot
maintitle="gene_short_name", # Column to use as plot title as per ValidatedSpliceFeature
xlabels.size=9, # Font size for x-axis labels
level="splicing", # "gene"/"splicing" data to plot
n.cells=25, # For modality assignment (see AssignModality function above)
sigma.sq=0.001,
bimodal.adjust=TRUE,
seed=1,
modality.column="modality.bimodal.adj"
)
## [1] "Checking... Matrix column (sample) names match sample metadata"
## [1] "Checking... Matrix row (feature) names match feature metadata"
## [1] "Checking... Matrix column (sample) names match sample metadata"
## [1] "Checking... Matrix row (feature) names match feature metadata"
plot.2 <- marvel$adhocPlot$PSI
grid.arrange(plot.1, plot.2, nrow=1)
# Example of a gene whose mean changes in the SAME direction as mean PSI value
. <- unique(marvel$DE$Cor[which(marvel$DE$Cor$psi.gene.cor=="Same direction"), ])
gene_id <- .$gene_id[1]
tran_id <- .$tran_id[1]
marvel <- PlotValues(
MarvelObject=marvel, # Marvel object from CompareValues function()
cell.types=c("iPSC", "Endoderm"), # Cell types to plot
feature=gene_id, # gene_id to plot
maintitle="gene_short_name", # Column to use as plot title as per GeneFeature
level="gene" # "gene"/"splicing" data to plot
)
plot.1 <- marvel$adhocPlot$Gene
marvel <- PlotValues(
MarvelObject=marvel, # Marvel object from CompareValues function()
cell.types=c("iPSC", "Endoderm"), # Cell types to plot
feature=tran_id, # tran(script)_id to plot
maintitle="gene_short_name", # Column to use as plot title as per ValidatedSpliceFeature
xlabels.size=9, # Font size for x-axis labels
level="splicing", # "gene"/"splicing" data to plot
n.cells=25, # For modality assignment (see AssignModality function above)
sigma.sq=0.001,
bimodal.adjust=TRUE,
seed=1,
modality.column="modality.bimodal.adj"
)
## [1] "Checking... Matrix column (sample) names match sample metadata"
## [1] "Checking... Matrix row (feature) names match feature metadata"
## [1] "Checking... Matrix column (sample) names match sample metadata"
## [1] "Checking... Matrix row (feature) names match feature metadata"
plot.2 <- marvel$adhocPlot$PSI
grid.arrange(plot.1, plot.2, nrow=1)
# Example of a gene whose mean changes in the OPPOSITE direction as mean PSI value
. <- unique(marvel$DE$Cor[which(marvel$DE$Cor$psi.gene.cor=="Opposite direction"), ])
gene_id <- .$gene_id[1]
tran_id <- .$tran_id[1]
marvel <- PlotValues(
MarvelObject=marvel, # Marvel object from CompareValues function()
cell.types=c("iPSC", "Endoderm"), # Cell types to plot
feature=gene_id, # gene_id to plot
maintitle="gene_short_name", # Column to use as plot title as per GeneFeature
level="gene" # "gene"/"splicing" data to plot
)
plot.1 <- marvel$adhocPlot$Gene
marvel <- PlotValues(
MarvelObject=marvel, # Marvel object from CompareValues function()
cell.types=c("iPSC", "Endoderm"), # Cell types to plot
feature=tran_id, # tran(script)_id to plot
maintitle="gene_short_name", # Column to use as plot title as per ValidatedSpliceFeature
xlabels.size=9, # Font size for x-axis labels
level="splicing", # "gene"/"splicing" data to plot
n.cells=25, # For modality assignment (see AssignModality function above)
sigma.sq=0.001,
bimodal.adjust=TRUE,
seed=1,
modality.column="modality.bimodal.adj"
)
## [1] "Checking... Matrix column (sample) names match sample metadata"
## [1] "Checking... Matrix row (feature) names match feature metadata"
## [1] "Checking... Matrix column (sample) names match sample metadata"
## [1] "Checking... Matrix row (feature) names match feature metadata"
plot.2 <- marvel$adhocPlot$PSI
grid.arrange(plot.1, plot.2, nrow=1)
ModalityChange function for this purpose.marvel <- ModalityChange(
MarvelObject=marvel, # Marvel object from CompareValues function()
psi.de.sig=0.05, # Adjusted p-value below which the splicing
# event is considered differentially spliced
cell.types=c("iPSC", "Endoderm"), # Cell types to analyse
n.cells=25, # For modality assignment (see AssignModality function above)
sigma.sq=0.001,
bimodal.adjust=TRUE,
seed=1,
modality.column="modality.bimodal.adj" #
)
## [1] "Checking... Matrix column (sample) names match sample metadata"
## [1] "Checking... Matrix row (feature) names match feature metadata"
## [1] "Checking... Matrix column (sample) names match sample metadata"
## [1] "Checking... Matrix row (feature) names match feature metadata"
marvel$DE$ModalityProp
## modality.change freq pct
## 1 Explicit 65 12.84585
## 2 Implicit 75 14.82213
## 3 Restricted 366 72.33202
marvel$DE$ModalityPlot
# Example of explicit modality change
tran_ids <- marvel$DE$Modality[which(marvel$DE$Modality["modality.change"]=="Explicit"), "tran_id"]
marvel <- PlotValues(
MarvelObject=marvel, # Marvel object from CompareValues function()
cell.types=c("iPSC", "Endoderm"), # Cell types to plot
feature=tran_ids[1], # tran(script)_ids to plot
maintitle="gene_short_name", # Column to use as plot title as per ValidatedSpliceFeature
xlabels.size=5, # Font size for x-axis labels
level="splicing", # "gene"/"splicing" data to plot
n.cells=25, # For modality assignment (see AssignModality function above)
sigma.sq=0.001,
bimodal.adjust=TRUE,
seed=1,
modality.column="modality.bimodal.adj"
)
## [1] "Checking... Matrix column (sample) names match sample metadata"
## [1] "Checking... Matrix row (feature) names match feature metadata"
## [1] "Checking... Matrix column (sample) names match sample metadata"
## [1] "Checking... Matrix row (feature) names match feature metadata"
plot.1 <- marvel$adhocPlot$PSI
# Example of implicit modality change
tran_ids <- marvel$DE$Modality[which(marvel$DE$Modality["modality.change"]=="Implicit"), "tran_id"]
marvel <- PlotValues(
MarvelObject=marvel, # Marvel object from CompareValues function()
cell.types=c("iPSC", "Endoderm"), # Cell types to plot
feature=tran_ids[1], # tran(script)_ids to plot
maintitle="gene_short_name", # Column to use as plot title as per ValidatedSpliceFeature
xlabels.size=5, # Font size for x-axis labels
level="splicing", # "gene"/"splicing" data to plot
n.cells=25, # For modality assignment (see AssignModality function above)
sigma.sq=0.001,
bimodal.adjust=TRUE,
seed=1,
modality.column="modality.bimodal.adj"
)
## [1] "Checking... Matrix column (sample) names match sample metadata"
## [1] "Checking... Matrix row (feature) names match feature metadata"
## [1] "Checking... Matrix column (sample) names match sample metadata"
## [1] "Checking... Matrix row (feature) names match feature metadata"
plot.2 <- marvel$adhocPlot$PSI
# Example of restricted modality change
tran_ids <- marvel$DE$Modality[which(marvel$DE$Modality["modality.change"]=="Restricted"), "tran_id"]
marvel <- PlotValues(
MarvelObject=marvel, # Marvel object from CompareValues function()
cell.types=c("iPSC", "Endoderm"), # Cell types to plot
feature=tran_ids[3], # tran(script)_ids to plot
maintitle="gene_short_name", # Column to use as plot title as per ValidatedSpliceFeature
xlabels.size=5, # Font size for x-axis labels
level="splicing", # "gene"/"splicing" data to plot
n.cells=25, # For modality assignment (see AssignModality function above)
sigma.sq=0.001,
bimodal.adjust=TRUE,
seed=1,
modality.column="modality.bimodal.adj"
)
## [1] "Checking... Matrix column (sample) names match sample metadata"
## [1] "Checking... Matrix row (feature) names match feature metadata"
## [1] "Checking... Matrix column (sample) names match sample metadata"
## [1] "Checking... Matrix row (feature) names match feature metadata"
plot.3 <- marvel$adhocPlot$PSI
grid.arrange(plot.1, plot.2, plot.3, nrow=1)
# Tabulate % per modality change
changes <- c("Explicit", "Implicit", "Restricted")
pct.conserved <- NULL
for(i in 1:length(changes)) {
# Subset relevant modality change
. <- marvel$DE$Modality[which(marvel$DE$Modality["modality.change"]==changes[i]), ]
# Subset ribosomal genes
gene_short_names.1 <- unique(grep("^RPL", .$gene_short_name, value=TRUE))
gene_short_names.2 <- unique(grep("^RPS", .$gene_short_name, value=TRUE))
# Compute % of ribosomal genes
n.conserved <- length(c(gene_short_names.1, gene_short_names.2))
pct.conserved[i] <- n.conserved/length(unique(.$gene_short_name)) * 100
}
results <- data.frame("modality.change"=changes,
"pct"=pct.conserved,
stringsAsFactors=FALSE
)
# Barplot
barplot(pct ~ modality.change, results)